library(tidyverse)
library(dplyr)
library(stringr)
library(svMisc)
library(ggplot2)
library(gtable)
library("ggpubr")
log_data <- read_csv("~/Desktop/PAE/github/data/logData.csv")
log_data_saver <- log_data
log_data <- unique(log_data)
log_data$logInfo <- str_replace(log_data$logInfo, " ", " ")
view(log_data)
#MAKE IT ALL UPPER!!!
log_data$logInfo <- toupper(log_data$logInfo)
### GETTING THE DATE MONTH AND YEAR IF I WANT!!!
extractdate <- function(date) {
day <- format(date, format="%d")
month <- format(date, format="%m")
year <- format(date, format="%Y")
cbind(day, month, year)
}
#making first calls
first_call<-extractdate(log_data$callTime)
#making last call
last_day <-substr(log_data$lastCallTime, 9, 10)
last_month <- substr(log_data$lastCallTime, 6, 7)
last_year <-substr(log_data$lastCallTime, 1, 4)
log_data<-cbind(log_data, first_call,last_day,last_month,last_year)
#Converting columns to numeric
log_data$day <-as.numeric(log_data$day)
log_data$last_day <- as.numeric(log_data$last_day)
log_data$month <-as.numeric(log_data$month)
log_data$last_month <- as.numeric(log_data$last_month)
log_data$year <- as.numeric(log_data$year)
log_data$last_year <- as.numeric(log_data$last_year )
log_data$year <- log_data$year +log_data$month/12
log_data$last_year <- log_data$last_year + log_data$last_month/12
log_data$caller_lifetime <- log_data$last_year - log_data$year
Cmd+Option+I.
Cmd+Shift+K
In this section, we display some of the basic distributions of the data including languages, total lifeetime of a given caller, colls made, content accessed, and the ratio of content listened to calls made.
Based on the graphs below, we are nervous that some of the data is not random given the incredibly high ratio for content listened to calls made, but given that the data is distributed fairly evenly across thee different graphs, we are unsure
sum(content_ratio$n)
[1] 16445